How to make a good rendering table:
| column1 | column2 | column3 |
|---|---|---|
| 1 | 2 | 3 |
| a | b | c |
knitr::opts_chunk$set(echo = TRUE)
StartTime <-Sys.time()
# 8-digit Date tag:
Date <- substr(gsub("-","",Sys.time()),1,8)
# libraries:
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
##
## first, second
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
##
## shift
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
##
## Attaching package: 'Laurae'
## The following object is masked from 'package:matrixStats':
##
## mean2
## The following object is masked from 'package:data.table':
##
## setDF
## Loading required package: magrittr
Functions used thoughout this script.
p.adjust <- function(p, method = p.adjust.methods, n = length(p))
{
## Methods 'Hommel', 'BH', 'BY' and speed improvements
## contributed by Gordon Smyth
method <- match.arg(method)
if(method == "fdr") method <- "BH" # back compatibility
nm <- names(p)
p <- as.numeric(p)
p0 <- setNames(p, nm)
if(all(nna <- !is.na(p))) nna <- TRUE
p <- p[nna]
lp <- length(p)
# stopifnot(n >= lp)
if (n <= 1) return(p0)
if (n == 2 && method == "hommel") method <- "hochberg"
p0[nna] <-
switch(method,
bonferroni = pmin(1, n * p),
holm = {
i <- seq_len(lp)
o <- order(p)
ro <- order(o)
pmin(1, cummax( (n - i + 1L) * p[o] ))[ro]
},
hommel = { ## needs n-1 >= 2 in for() below
if(n > lp) p <- c(p, rep.int(1, n-lp))
i <- seq_len(n)
o <- order(p)
p <- p[o]
ro <- order(o)
q <- pa <- rep.int( min(n*p/i), n)
for (j in (n-1):2) {
ij <- seq_len(n-j+1)
i2 <- (n-j+2):n
q1 <- min(j*p[i2]/(2:j))
q[ij] <- pmin(j*p[ij], q1)
q[i2] <- q[n-j+1]
pa <- pmax(pa,q)
}
pmax(pa,p)[if(lp < n) ro[1:lp] else ro]
},
hochberg = {
i <- lp:1L
o <- order(p, decreasing = TRUE)
ro <- order(o)
pmin(1, cummin( (n - i + 1L) * p[o] ))[ro]
},
BH = {
i <- lp:1L
o <- order(p, decreasing = TRUE)
ro <- order(o)
pmin(1, cummin( n / i * p[o] ))[ro]
},
BY = {
i <- lp:1L
o <- order(p, decreasing = TRUE)
ro <- order(o)
q <- sum(1L/(1L:n))
pmin(1, cummin(q * n / i * p[o]))[ro]
},
none = p)
p0
}
SetFileName <- function(filename, initials) {
# Set filename with extension and initials to make filename with date integrated.
filename <- substitute(filename)
initials <- substitute(initials)
filename <- paste0(initials, Date, filename)
filename
}
# From Fede:
# ggpairs custom functions
corColor <- function(data, mapping, color = I("black"), sizeRange = c(1, 3), ...) {
x <- eval_data_col(data, mapping$x)
y <- eval_data_col(data, mapping$y)
r <- cor(x, y)
rt <- format(r, digits = 3)
tt <- as.character(rt)
cex <- max(sizeRange)
# helper function to calculate a useable size
percent_of_range <- function(percent, range) {
percent * diff(range) + min(range, na.rm = TRUE)
}
# plot correlation coefficient
p <- ggally_text(label = tt, mapping = aes(), xP = 0.5, yP = 0.5,
size = I(percent_of_range(cex * abs(r), sizeRange)), color = color, ...) +
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_blank())
corColors <- RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")[2:6]
if (r <= boundaries[1]) {
corCol <- corColors[1]
} else if (r <= boundaries[3]) {
corCol <- corColors[2]
} else if (r < boundaries[3]) {
corCol <- corColors[3]
} else if (r < boundaries[4]) {
corCol <- corColors[4]
} else {
corCol <- corColors[5]
}
p <- p +
theme(panel.background = element_rect(fill = corCol))
return(p)
}
# stat_smooth_func <- function(mapping = NULL, data = NULL,
# geom = "smooth", position = "identity",
# ...,
# method = "auto",
# formula = y ~ x,
# se = TRUE,
# n = 80,
# span = 0.75,
# fullrange = FALSE,
# level = 0.95,
# method.args = list(),
# na.rm = FALSE,
# show.legend = NA,
# inherit.aes = TRUE,
# xpos = NULL,
# ypos = NULL) {
# layer(
# data = data,
# mapping = mapping,
# stat = StatSmoothFunc,
# geom = geom,
# position = position,
# show.legend = show.legend,
# inherit.aes = inherit.aes,
# params = list(
# method = method,
# formula = formula,
# se = se,
# n = n,
# fullrange = fullrange,
# level = level,
# na.rm = na.rm,
# method.args = method.args,
# span = span,
# xpos = xpos,
# ypos = ypos,
# ...
# )
# )
# }
#
# StatSmoothFunc <- ggproto("StatSmooth", Stat,
#
# setup_params = function(data, params) {
# # Figure out what type of smoothing to do: loess for small datasets,
# # gam with a cubic regression basis for large data
# # This is based on the size of the _largest_ group.
# if (identical(params$method, "auto")) {
# max_group <- max(table(data$group))
#
# if (max_group < 1000) {
# params$method <- "loess"
# } else {
# params$method <- "gam"
# params$formula <- y ~ s(x, bs = "cs")
# }
# }
# if (identical(params$method, "gam")) {
# params$method <- mgcv::gam
# }
#
# params
# },
#
# compute_group = function(data, scales, method = "auto", formula = y~x,
# se = TRUE, n = 80, span = 0.75, fullrange = FALSE,
# xseq = NULL, level = 0.95, method.args = list(),
# na.rm = FALSE, xpos=NULL, ypos=NULL) {
# if (length(unique(data$x)) < 2) {
# # Not enough data to perform fit
# return(data.frame())
# }
#
# if (is.null(data$weight)) data$weight <- 1
#
# if (is.null(xseq)) {
# if (is.integer(data$x)) {
# if (fullrange) {
# xseq <- scales$x$dimension()
# } else {
# xseq <- sort(unique(data$x))
# }
# } else {
# if (fullrange) {
# range <- scales$x$dimension()
# } else {
# range <- range(data$x, na.rm = TRUE)
# }
# xseq <- seq(range[1], range[2], length.out = n)
# }
# }
# # Special case span because it's the most commonly used model argument
# if (identical(method, "loess")) {
# method.args$span <- span
# }
#
# if (is.character(method)) method <- match.fun(method)
#
# base.args <- list(quote(formula), data = quote(data), weights = quote(weight))
# model <- do.call(method, c(base.args, method.args))
#
# m = model
# eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
# list(a = format(coef(m)[1], digits = 3),
# b = format(coef(m)[2], digits = 3),
# r2 = format(summary(m)$r.squared, digits = 3)))
# func_string = as.character(as.expression(eq))
#
# if(is.null(xpos)) xpos = min(data$x)*0.9
# if(is.null(ypos)) ypos = max(data$y)*0.9
# data.frame(x=xpos, y=ypos, label=func_string)
#
# },
#
# required_aes = c("x", "y")
# )setwd("/DATA/usr/m.trauernicht/projects/EpiScreen/data/files_scripts")
# Import data from preprocessing script
RSTP2_2000_indels.df <- get(load("mt20190501_RSTP2_2000_indels.df"))
indel.data <- RSTP2_2000_indels.df[,c(1:6,48,55,56,120:125,128)]
indel.data.all <- RSTP2_2000_indels.df[,c(1:6,48:56,120:125,128)]
integrations <- read.table("barcode_order.txt", header = T)
# Import viability data and the mean +1/-7 ratio from previous experiments
viability.data <- read.delim("mt20190124_viabilityData")
viability <- read.csv2("EpiscreenAllPlates.csv")
mean.ratio.data <- read.delim("mt20190124_meanratiosperbarcode")## Warning in melt(viability): The melt generic in data.table has been passed
## a data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(viability). In the next version, this warning will become an
## error.
## Using X as id variables
viability.df$value <- ave(viability.df$value, viability.df$variable, FUN = function(x) x/max(x))
for (i in unique(viability.df$variable)) {
p <- raw_grid(data = viability.df$value[viability.df$variable == i],
well = viability.df$X[viability.df$variable == i],
plate_id = viability.df$variable[viability.df$variable == i])
print(p)
}# Add viability data to indel.data
indel.data <- merge(viability.data, indel.data)
setnames(indel.data, old = "V2", new = "rn")
# Add previously estimated +1/-7 ratio data for correlation check-ups
indel.data <- merge(mean.ratio.data, indel.data, all = T)
all.indel.data <- indel.data
# Rename columns
setnames(indel.data, old = ".id", new = "condition")
setnames(indel.data, old = "rn", new = "barcode")
# Remove barcodes with abnormal counts from analysis
indel.data <- indel.data[indel.data$barcode != "ACCCCTAAAGGCGCTG" &
indel.data$barcode != "CTCTTAATCGCTGCC",]
# Set a threshold on minimun +1 and -7 read counts to exclude noisy data
low.indel.count <- indel.data[indel.data$read.count1.7 < 30,]
low.indel.count <- low.indel.count[low.indel.count$X0 > 100,]
low.indel.count <- low.indel.count[!low.indel.count$Drug %in% "PAO",]
indel.data <- indel.data[indel.data$read.count1.7 > 30,]
# Remove 0, NAs, Infs
indel.data <- indel.data[is.finite(indel.data$ratio),]
indel.data <- indel.data[indel.data$ratio != 0,]
# Add plate specifier
indel.data$plate.nr <- "1"
indel.data[grep("10um_rep1_plate2", indel.data$condition),]$plate.nr <- "2"
indel.data[grep("10um_rep2_plate1", indel.data$condition),]$plate.nr <- "3"
indel.data[grep("10um_rep2_plate2", indel.data$condition),]$plate.nr <- "4"
indel.data[grep("10um_rep3_plate1", indel.data$condition),]$plate.nr <- "5"
indel.data[grep("10um_rep3_plate2", indel.data$condition),]$plate.nr <- "6"
indel.data[grep("1um_rep1_plate1", indel.data$condition),]$plate.nr <- "7"
indel.data[grep("1um_rep1_plate2", indel.data$condition),]$plate.nr <- "8"
indel.data[grep("1um_rep2_plate1", indel.data$condition),]$plate.nr <- "9"
indel.data[grep("1um_rep2_plate2", indel.data$condition),]$plate.nr <- "10"
indel.data[grep("1um_rep3_plate1", indel.data$condition),]$plate.nr <- "11"
indel.data[grep("1um_rep3_plate2", indel.data$condition),]$plate.nr <- "12"
indel.data[grep("100nm_rep1_plate1", indel.data$condition),]$plate.nr <- "13"
indel.data[grep("100nm_rep1_plate2", indel.data$condition),]$plate.nr <- "14"
indel.data[grep("100nm_rep2_plate1", indel.data$condition),]$plate.nr <- "15"
indel.data[grep("100nm_rep2_plate2", indel.data$condition),]$plate.nr <- "16"
indel.data[grep("100nm_rep3_plate1", indel.data$condition),]$plate.nr <- "17"
indel.data[grep("100nm_rep3_plate2", indel.data$condition),]$plate.nr <- "18"
## Compute some useful entities for data analysis
# Take log values of ratio
indel.data$logratio <- ave(indel.data$ratio, FUN = function(x) log2(x))
# Compute relative viability
indel.data$viability <- ave(indel.data$viability, indel.data$plate.nr, FUN = function(x) x/max(x))
# We want to exclude reads from cells that have reduced viability (set cut-off at 45% reduced viability)
indel.data <- indel.data[indel.data$viability > 0.45,]
## Add some more classifiers
indel.data$classifier <- gsub("_rep[0-9]", "\\1", indel.data$condition)
indel.data$uniqueID <- paste(indel.data$barcode, indel.data$classifier)
# Optional: Exclude data completely (remove the remaining datapoint) if 2 out of 3 replicates were already excluded untill now
## indel.data <- indel.data[indel.data$uniqueID %in% names(which(table(indel.data$uniqueID) > 1)), ]## Normalize the data
# Plate normalization: substract log2(DMSO.mean)ratio from log2(drug)ratio per plate
for (i in 1:18) {
for (j in unique(indel.data$barcode)) {
dmso.mean <- mean(indel.data$logratio[indel.data$Drug ==
"DMSO" & indel.data$plate.nr ==
i & indel.data$barcode == j])
indel.data$logratio.platenorm[indel.data$plate.nr == i & indel.data$barcode == j] <-
indel.data$logratio[indel.data$plate.nr == i & indel.data$barcode == j] -
dmso.mean
}
}
# Plate normalization for efficiency
for (i in 1:18) {
for (j in unique(indel.data$barcode)) {
dmso.eff.mean <- mean(indel.data$efficiency[indel.data$Drug ==
"DMSO" & indel.data$plate.nr ==
i & indel.data$barcode == j])
indel.data$efficiency.platenorm[indel.data$plate.nr == i & indel.data$barcode == j] <-
indel.data$efficiency[indel.data$plate.nr == i & indel.data$barcode == j] -
dmso.eff.mean
}
}
## Perform t-tests - save in different file
# t-test for target groups
indel.data.aurora <- indel.data[indel.data$Target == "Aurora Kinase" |
indel.data$Drug == "DMSO" &
indel.data$conc == "1um",]
indel.data.aurora$logratio.drug <- ave(indel.data.aurora$logratio.platenorm,
indel.data.aurora$Target,
indel.data.aurora$rep,
FUN = function(x) mean(x))
indel.data.aurora <- indel.data.aurora %>% select(Target, rep, logratio.drug) %>% unique()
indel.data.aurora$mean <- ave(indel.data.aurora$logratio.drug, indel.data.aurora$Target, FUN = function(x) mean(x))
indel.data.aurora$sd <- ave(indel.data.aurora$logratio.drug, indel.data.aurora$Target, FUN = function(x) sd(x))
for (i in unique(indel.data.aurora$Target)) {
x <- indel.data.aurora[indel.data.aurora$Target == "Negative Control",]
y <- indel.data.aurora[indel.data.aurora$Target == "Aurora Kinase",]
if (nrow(y)>1) {
indel.data.aurora$p.value[indel.data.aurora$Target == i] <-
t.test(x$logratio.drug, y$logratio.drug)$p.value
indel.data.aurora$statistic[indel.data.aurora$Target == i] <-
t.test(x$logratio.drug, y$logratio.drug)$statistic
indel.data.aurora$conf.int1[indel.data.aurora$Target == i] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[1]
indel.data.aurora$conf.int2[indel.data.aurora$Target == i] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[2]
}
}
# Select relevant data
indel.data.aurora <- indel.data.aurora %>% select(Target, mean, sd, p.value) %>% unique()
# t-test for controls
indel.data.controls <- indel.data[indel.data$Drug == "DNA-PKi" |
indel.data$Drug == "DMSO" |
indel.data$Drug == "Mirin",]
indel.data.controls$logratio.drug <- ave(indel.data.controls$logratio.platenorm,
indel.data.controls$Drug,
indel.data.controls$rep, FUN = function(x) mean(x))
indel.data.controls <- indel.data.controls %>% select(Drug, rep, logratio.drug) %>% unique()
indel.data.controls$mean <- ave(indel.data.controls$logratio.drug, indel.data.controls$Drug, FUN = function(x) mean(x))
indel.data.controls$sd <- ave(indel.data.controls$logratio.drug, indel.data.controls$Drug, FUN = function(x) sd(x))
for (i in unique(indel.data.controls$Drug)) {
x <- indel.data.controls[indel.data.controls$Drug == "DMSO",]
y <- indel.data.controls[indel.data.controls$Drug == i,]
if (nrow(y)>1) {
indel.data.controls$p.value[indel.data.controls$Drug == i] <-
t.test(x$logratio.drug, y$logratio.drug)$p.value
indel.data.controls$statistic[indel.data.controls$Drug == i] <-
t.test(x$logratio.drug, y$logratio.drug)$statistic
indel.data.controls$conf.int1[indel.data.controls$Drug == i] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[1]
indel.data.controls$conf.int2[indel.data.controls$Drug == i] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[2]
}
}
# Select relevant data
indel.data.controls <- indel.data.controls %>% select(Drug, mean, sd, p.value) %>% unique()
# t-test for individual drugs
indel.data.drugs <- indel.data
indel.data.drugs$logratio.drug <- ave(indel.data.drugs$logratio.platenorm,
indel.data.drugs$Drug, indel.data$conc,
indel.data.drugs$rep, FUN = function(x) mean(x))
indel.data.drugs <- indel.data.drugs %>% select(Drug, rep, conc, logratio.drug) %>% unique()
indel.data.drugs$mean <- ave(indel.data.drugs$logratio.drug, indel.data.drugs$Drug, indel.data.drugs$conc, FUN = function(x) mean(x))
indel.data.drugs$sd <- ave(indel.data.drugs$logratio.drug, indel.data.drugs$Drug, indel.data.drugs$conc, FUN = function(x) sd(x))
for (j in unique(indel.data.drugs$conc)) {
for (i in unique(indel.data.drugs$Drug)) {
x <- indel.data.drugs[indel.data.drugs$Drug == "DMSO" & indel.data.drugs$conc == j,]
y <- indel.data.drugs[indel.data.drugs$Drug == i & indel.data.drugs$conc == j,]
if (nrow(y)>1) {
indel.data.drugs$p.value[indel.data.drugs$Drug == i & indel.data.drugs$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$p.value
indel.data.drugs$statistic[indel.data.drugs$Drug == i & indel.data.drugs$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$statistic
indel.data.drugs$conf.int1[indel.data.drugs$Drug == i & indel.data.drugs$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[1]
indel.data.drugs$conf.int2[indel.data.drugs$Drug == i & indel.data.drugs$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[2]
}
}
}
# Select relevant data
indel.data.drugs <- indel.data.drugs %>% select(Drug, conc, mean, sd, p.value) %>% unique()
# t-test for each drug, treating integrations as replicates
indel.data.drugs.int <- indel.data
indel.data.drugs.int$logratio.drug <- ave(indel.data.drugs.int$logratio.platenorm,
indel.data.drugs.int$Drug, indel.data$conc,
indel.data.drugs.int$rep, indel.data.drugs.int$barcode,
FUN = function(x) mean(x))
indel.data.drugs.int <- indel.data.drugs.int %>% select(Drug, rep, conc, classifier, logratio.drug) %>%
unique()
indel.data.drugs.int$mean <- ave(indel.data.drugs.int$logratio.drug, indel.data.drugs.int$Drug, indel.data.drugs.int$conc, FUN = function(x) mean(x))
indel.data.drugs.int$sd <- ave(indel.data.drugs.int$logratio.drug, indel.data.drugs.int$Drug, indel.data.drugs.int$conc, FUN = function(x) sd(x))
for (j in unique(indel.data.drugs.int$conc)) {
for (i in unique(indel.data.drugs.int$Drug)) {
x <- indel.data.drugs.int[indel.data.drugs.int$Drug == "DMSO" & indel.data.drugs.int$conc == j,]
y <- indel.data.drugs.int[indel.data.drugs.int$Drug == i & indel.data.drugs.int$conc == j,]
if (nrow(y)>1) {
indel.data.drugs.int$p.value[indel.data.drugs.int$Drug == i & indel.data.drugs.int$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$p.value
indel.data.drugs.int$statistic[indel.data.drugs.int$Drug == i & indel.data.drugs.int$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$statistic
indel.data.drugs.int$conf.int1[indel.data.drugs.int$Drug == i & indel.data.drugs.int$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[1]
indel.data.drugs.int$conf.int2[indel.data.drugs.int$Drug == i & indel.data.drugs.int$conc == j] <-
t.test(x$logratio.drug, y$logratio.drug)$conf.int[2]
}
}
}
# Adjust p-values for multiple testing and select relevant data
indel.data.drugs.int$p.value.adjust <- p.adjust(indel.data.drugs.int$p.value)
indel.data.drugs.int <- indel.data.drugs.int %>% select(Drug, conc, mean, sd, p.value) %>% unique()
# Run t-test within each well for each barcode
for (i in unique(indel.data$barcode)) {
for (j in unique(indel.data$classifier)) {
x <- indel.data[indel.data$Drug == "DMSO" & indel.data$barcode == i,]
y <- indel.data[indel.data$classifier == j & indel.data$barcode == i,]
if (nrow(y)>1) {
indel.data$p.value[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, y$logratio.platenorm)$p.value
indel.data$statistic[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, y$logratio.platenorm)$statistic
indel.data$conf.int1[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, y$logratio.platenorm)$conf.int[1]
indel.data$conf.int2[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, y$logratio.platenorm)$conf.int[2]
}
else {
indel.data$p.value[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, mu = y$logratio.platenorm)$p.value
indel.data$statistic[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, mu = y$logratio.platenorm)$statistic
indel.data$conf.int1[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, mu = y$logratio.platenorm)$conf.int[1]
indel.data$conf.int2[indel.data$classifier == j & indel.data$barcode == i] <-
t.test(x$logratio.platenorm, mu = y$logratio.platenorm)$conf.int[2]
indel.data$conf.int <- abs(indel.data$conf.int2 - indel.data$conf.int1)
}
}
}
# Adjust p-values for multiple testing
indel.data$p.value.adjust <- p.adjust(indel.data$p.value)
# Look at specific drugs
indel.data2 <- indel.data
indel.data2$logratio.mean <- ave(indel.data$logratio.platenorm,
indel.data$barcode,
indel.data$Drug,
indel.data$conc,
FUN = function(x) mean(x))
indel.data2$logratio.sd <- ave(indel.data$logratio.platenorm,
indel.data$barcode,
indel.data$Drug,
indel.data$conc,
FUN = function(x) sd(x))
indel.data2 <- indel.data2 %>% select(barcode, Drug, conc, logratio.platenorm,
p.value.adjust, logratio.mean, logratio.sd) %>% unique()
# Create outlier df based on t-statistic
t.outlier.df <- indel.data[indel.data$p.value <= 0.05,]
# Calculate the mean of the three replicates
indel.data$logratio.mean <-
ave(indel.data$logratio.platenorm,
indel.data$classifier, indel.data$barcode, FUN = function(x) mean(x))
indel.data$efficiency.mean <-
ave(indel.data$efficiency.platenorm,
indel.data$classifier, indel.data$barcode, FUN = function(x) mean(x))We can see that there is a very high correlation between all 3 replicates, meaning that we can use the data of all 3 replicates.
## Df Sum Sq Mean Sq F value Pr(>F)
## integration 17 10.403 0.6120 15.84 <2e-16 ***
## Residuals 72 2.781 0.0386
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The data looks very nice, we can already spot the first outliers. Now we need to generate more sophisticated plots (like heatmaps) and integrate chromatin data to select hits.
## [1] "Run time: 2.563883 mins"
## [1] "/DATA/usr/m.trauernicht/projects/EpiScreen/code/epigenetic-screening-on-trip-clone"
## [1] "Fri May 1 09:49:13 2020"
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggpubr_0.2.5 magrittr_1.5
## [3] Laurae_0.0.0.9001 platetools_0.1.2
## [5] gghighlight_0.1.0 DESeq2_1.24.0
## [7] SummarizedExperiment_1.14.1 DelayedArray_0.10.0
## [9] BiocParallel_1.18.1 matrixStats_0.55.0
## [11] Biobase_2.44.0 GenomicRanges_1.36.1
## [13] GenomeInfoDb_1.20.0 IRanges_2.18.3
## [15] S4Vectors_0.22.1 BiocGenerics_0.30.0
## [17] GGally_1.5.0 ggrepel_0.8.1
## [19] data.table_1.12.8 ggbeeswarm_0.6.0
## [21] ggplot2_3.2.1 outliers_0.14
## [23] dplyr_0.8.5
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2
## [4] tools_3.6.3 backports_1.1.5 R6_2.4.1
## [7] rpart_4.1-15 vipor_0.4.5 Hmisc_4.3-0
## [10] DBI_1.1.0 lazyeval_0.2.2 colorspace_1.4-1
## [13] nnet_7.3-12 withr_2.1.2 tidyselect_0.2.5
## [16] gridExtra_2.3 bit_1.1-15 compiler_3.6.3
## [19] htmlTable_1.13.3 labeling_0.3 scales_1.1.0
## [22] checkmate_1.9.4 genefilter_1.66.0 stringr_1.4.0
## [25] digest_0.6.23 foreign_0.8-74 rmarkdown_2.0
## [28] XVector_0.24.0 base64enc_0.1-3 jpeg_0.1-8.1
## [31] pkgconfig_2.0.3 htmltools_0.4.0 htmlwidgets_1.5.1
## [34] rlang_0.4.5 rstudioapi_0.10 RSQLite_2.2.0
## [37] farver_2.0.1 acepack_1.4.1 RCurl_1.95-4.12
## [40] GenomeInfoDbData_1.2.1 Formula_1.2-3 Matrix_1.2-18
## [43] Rcpp_1.0.3 munsell_0.5.0 lifecycle_0.2.0
## [46] stringi_1.4.6 yaml_2.2.0 zlibbioc_1.30.0
## [49] plyr_1.8.6 grid_3.6.3 blob_1.2.0
## [52] crayon_1.3.4 lattice_0.20-38 splines_3.6.3
## [55] annotate_1.62.0 locfit_1.5-9.1 knitr_1.28
## [58] pillar_1.4.3 ggsignif_0.6.0 reshape2_1.4.4
## [61] geneplotter_1.62.0 XML_3.98-1.20 glue_1.3.1
## [64] evaluate_0.14 latticeExtra_0.6-29 png_0.1-7
## [67] vctrs_0.2.4 gtable_0.3.0 purrr_0.3.3
## [70] reshape_0.8.8 assertthat_0.2.1 xfun_0.12
## [73] xtable_1.8-4 survival_3.1-8 tibble_3.0.1
## [76] AnnotationDbi_1.46.1 beeswarm_0.2.3 memoise_1.1.0
## [79] cluster_2.1.0 ellipsis_0.3.0